import lxml.etree as le

import urllib.request as ur

def get_web_html(url="https://edu.csdn.net/"):
    url_patten=ur.Request(url)
    content=ur.urlopen(url_patten).read()

    with open("edu.html","wb+") as f:
        f.write(content)

def parse_html_content(file="edu.html"):
    with open(file,"r",encoding="utf-8") as f:
        #读取本地的edu.html
        html=f.read()
        #把html转化为xml对象
        html_x=le.HTML(html)
        '//div[@class="classify_c"]/div/h3/a/text()"]'
        div_x_s=html_x.xpath('//div[@class="classify_cList"]')

        data_s=[]
        for div_x in div_x_s:
            category1=div_x.xpath("./h3/a/text()")[0]
            category2_s=div_x.xpath("./div/span/a/text()")
            data_s.append(dict(
                category1=category1,
                category2_s=category2_s
            ))
    return data_s

if __name__=="__main__":

    get_web_html_content=get_web_html()
    data_s=parse_html_content()
    for data in data_s:
        print(data.get("category1"))
        for category2 in data.get("category2_s"):
            print(" ",category2)